
****************************************************************************
* Part A: Prepare index worker sample
****************************************************************************

use Data/data_coworkers, clear  // Contains PersonId WorkplaceId WP_HPG_id
duplicates drop PersonId WorkplaceId, force

* Add individual covariates and votes
merge 1:1 PersonId using Data/data_ind_X_2017
keep if _merge == 3
drop _merge

merge 1:1 PersonId using Data/data_voting_2010
drop if _merge == 2
drop _merge
rename Voted2010 IndexWorkerVoted2010
gen IndexWorkerVoted2010Miss = missing(IndexWorkerVoted2010)
replace IndexWorkerVoted2010 = 0 if missing(IndexWorkerVoted2010)

merge 1:1 PersonId using Data/data_voting_2018
drop if _merge == 2
drop _merge
rename elec_district elec_district2018

merge 1:1 PersonId using Data/data_voting_2022
drop if _merge == 2
drop _merge

merge 1:1 PersonId using Data/data_relatives_X
drop if _merge == 2
drop _merge

save Data/tmp_data_index_workers_r50, replace
use Data/tmp_data_index_workers_r50, clear

* Sample restrictions
drop if missing(Voted2018, Voted2022)
drop if missing(Relative_Voted2010)
drop if Foreign == 1
drop if Income < 1000

* Remove workers with relatives at the same workplace
merge 1:1 PersonId WorkplaceId using Data/data_coworkers_with_relatives_who_are_coworkers
drop if _merge == 2 | HasACoworkerWhoIsARelative == 1
drop _merge HasACoworkerWhoIsARelative

* Age restriction 
keep if inrange(Age2018, 18, 65)

* Drop singleton peer groups
egen WP_NumWorkers = count(PersonId), by(WP_HPG_id)
drop if WP_NumWorkers <= 1
egen num_HPG_at_firm = nvals(WP_HPG_id), by(WP_P0846_lopnr_PeOrgNr)

save Data/data_index_workers_X_18_65_years, replace

********************************************************************************
* Part B: Collapse coworker peer characteristics (HPG_Coworkers-level averages)
********************************************************************************

use Data/data_index_workers_X_18_65_years, clear

* Drop variables not used in peer averages
drop PersonId elec_district2018 IndexWorkerVoted2010 IndexWorkerVoted2010Miss ///
     Voted2022 Age2017 Age2018 Age2022 deso num_HPG_at_firm
	
collapse (mean)AstKommun (mean)Income (mean)NumberOfChildren (mean)Married (mean)Voted2018 (mean)Age2010 (mean)Relative_* (count) WP_NumWorkers = WP_P0846_lopnr_PeOrgNr (sum)WP_NumRelatives = NumberOfRelatives, by(WP_HPG_id)

* Rename collapsed vars
foreach v of varlist AstKommun-Relative_NumberOfChildren {
    rename `v' WP_`v'
}

order WP_HPG_id WP_NumWorkers WP_AstKommun WP_Income WP_Voted2018 WP_Relative_Voted2010

save Data/data_coworker_X_18_65_years, replace

********************************************************************************
* Part C: Merge peer means and adjust by subtracting own value
********************************************************************************

use Data/data_index_workers_X_18_65_years, clear

merge m:1 WP_HPG_id using Data/data_coworker_X_18_65_years
drop _merge

* Adjust WP_* variables to exclude own value
foreach v in Income Voted2018 Married Age2010 Relative_Voted2010 Relative_Income Relative_Female Relative_Age2010 Relative_EduLessHighSchool Relative_EduHighSchool Relative_EduCollege Relative_EduMiss Relative_Married Relative_NumberOfChildren {
    
    local wv = "WP_`v'"
    replace `wv' = (`wv' * WP_NumWorkers - `v') / (WP_NumWorkers - 1) if `v' != .
}

* Add firm characteristics
rename WP_P0846_lopnr_PeOrgNr P0846_lopnr_PeOrgNr
merge m:1 P0846_lopnr_PeOrgNr using Data/data_firm_X_2017
drop if _merge == 2
drop _merge

* Add parental and EU2009 vote
merge 1:1 PersonId using Data/data_ParentsVoted
drop if _merge == 2
drop _merge

merge 1:1 PersonId using Data/data_voting_EU_2009
drop if _merge == 2
drop _merge

* Add parental characteristics
merge m:1 PersonId using Data/data_Parents_X
drop _merge

* Dummies
foreach  parent_X in Married Married2 EduCollege   {

 gen Fathers`parent_X'Miss = 0
 replace Fathers`parent_X'Miss = 1 if Fathers`parent_X' == .
 replace Fathers`parent_X' = 0 if Fathers`parent_X'Miss == 1 
 
 gen Mother`parent_X'Miss = 0
 replace Mother`parent_X'Miss = 1 if Mother`parent_X' == .
 replace Mother`parent_X' = 0 if Mother`parent_X'Miss == 1 
}

* Continuous 
foreach parent_X in Age2017 NumberOfChildren LogIncome Income  {
	
 gen Fathers`parent_X'Miss = 0
 replace Fathers`parent_X'Miss = 1 if Fathers`parent_X' == .
 summarize Fathers`parent_X', meanonly
 replace Fathers`parent_X' = r(mean)  if Fathers`parent_X'Miss == 1 
 
 gen Mother`parent_X'Miss = 0
 replace Mother`parent_X'Miss = 1 if Mother`parent_X' == .
 summarize Mother`parent_X', meanonly
 replace Mother`parent_X' = r(mean)  if Mother`parent_X'Miss == 1 
 
}

save Data/data_main_analysis_coworker_peers_18_65_years, replace
